(** CDBs store (bytes * bytes) with no other preconditions.
 * http://cr.yp.to/cdb/cdb.txt
 *
 * It has 3 secions: pointers, data and hashes.
 *
 * when
 * - adding tuples, you append them to the data section and store the hash and position in a map.
 * - removing tuples, you copy all others to a new cdb.
 * - replacing, you first remove and then add
 *
 * Basic operations:
 * - fold_left data tuples
*)

(* No further 31 bit overfow checks *)
assert (Sys.int_size >= 32);

(** http://cr.yp.to/cdb/cdb.txt *)
module H = struct
  let ( + )    = Int.add
  let ( << )   = Int.shift_left
  let ( ^ )    = Int.logxor
  let ( land ) = Int.logand

  let foldr h c =
    let c = c |> Char.code in
    (((h << 5) + h) ^ c) land 0xFFffFFff
end

let hash byt =
  byt |> Bytes.fold_left H.foldr 5381

(** 4 little endian bytes as an unsigned int.
*)
let read4b a o =
  assert (o >= 0);
  let ( << ) = Int.shift_left
  and ( || ) = Int.logor in
  let b i = a.{o + i} << 8 * i in
  b 0 || b 1 || b 2 || b 3

(** copy bytes into a new buffer. *)
let blit a o l =
  assert (o >= 0);
  assert (l >= 0);
  let r = l |> Bytes.create in
  for i = 0 to pred l do
    Bytes.set_int8 r i a.{o + i}
  done;
  r

(** read one k-v tuple from the data section. Could return lazy function to save
 *  reads (and allocs) in edge-cases. *)
let read_record a o =
  let kl = read4b a o in
  let dl = read4b a (o+4) in
  blit a (o+8) kl,
  blit a (o+8+kl) dl

let map fn =
  let open Bigarray in
  let fd = Unix.openfile fn [ Unix.O_RDONLY ] 0 in
  let mf = Unix.map_file fd int8_unsigned c_layout false [| -1 |] in
  let arr = mf |> array1_of_genarray in
  fd,arr

(** fold_left over all k-v records *)
let fold_left f init fn =
  let fd,arr = map fn in
  let h0 = 0 |> read4b arr in
  let p = ref 2048 in
  let r = ref init in
  while !p < h0 do
    let k,v = read_record arr !p in
    r := f !r (!p,(k,v));
    p := !p + 8 + Bytes.length k + Bytes.length v
  done;
  fd |> Unix.close;
  !r

(** fold_left over all values for the key *)
let fold_left_key key f init fn =
  let fd,arr = map fn in
  let h = key |> hash in
  let p = h mod 256 in
  let pos = read4b arr (p * 8)
  and slots = read4b arr (p * 8 + 4) in
  let slot = ref ((h / 256) mod slots) in
  let init = ref init in
  let loop = ref true in
  while !loop do
    let ha = read4b arr (pos + !slot*8) in
    let po = read4b arr (pos + !slot*8 + 4) in
    (if ha |> Int.equal h
     then
       let k,v = po |> read_record arr in
       if k |> Bytes.equal key
       then
         init := f !init v );
    slot := succ !slot;
    loop := po != 0
  done;
  fd |> Unix.close;
  !init

(** there may be multiple k-v tuples with the same key *)
let find_first_key key fn : bytes option =
  let fd,arr = map fn in
  let h = key |> hash in
  let p = h mod 256 in
  let pos = read4b arr (p * 8)
  and slots = read4b arr (p * 8 + 4) in
  let slot = ref ((h / 256) mod slots) in
  let init = ref None in
  let loop = ref true in
  while !loop do
    let ha = read4b arr (pos + !slot*8) in
    let po = read4b arr (pos + !slot*8 + 4) in
    loop := po != 0;
    (if ha |> Int.equal h
     then
       (* we may read v but not use it. We could use lazy functions. *)
       let k,v = po |> read_record arr in
       if k |> Bytes.equal key
       then (
         loop := false;
         init := Some v ));
    slot := succ !slot;
  done;
  fd |> Unix.close;
  !init

(** *)
let truncate fn =
  (* if nonex or size < 256*8: bootstrap *)
  let fd,arr = map fn in
  let h0 = 0 |> read4b arr in (* end of data section *)
  Unix.close fd;
  Unix.truncate fn h0

let write4ble ch n =
  let ( >> ) = Int.shift_right in
  let b i = n >> i*8 |> output_byte ch in
  b 0; b 1; b 2; b 3

(** fn must be truncated before *)
let append f init (k,v) fn =
  let write_record ch k v =
    k |> Bytes.length |> write4ble ch;
    v |> Bytes.length |> write4ble ch;
    output_bytes ch k;
    output_bytes ch v
  in
  let h0 = (fn |> Unix.stat).st_size in
  let ch = open_out_gen [Open_append;Open_binary;Open_wronly] 0 fn in
  write_record ch k v;
  close_out ch;
  f init (k |> hash,h0)

module IntMap = Map.Make(Int)
module IntSet = Set.Make(Int)

(** populates an array with 256 hashmaps hash -> pos set *)
let kv_add' a (has,pos) =
  let idx = has mod (a |> Array.length) in
  let m = a.(idx) in
  let l = (match m |> IntMap.find_opt has with
      | Some l -> l
      | None -> IntSet.empty) |> IntSet.add pos in
  a.(idx) <- m |> IntMap.add has l 

let write_hashes oc buck m =
  let p0 = oc |> pos_out in
  let c = ref 0 in
  m |> IntMap.iter (fun h ps ->
      ps |> IntSet.iter (fun p ->
          write4ble oc h;
          write4ble oc p;
          c := succ !c)
    );
  (* write pointer p0 count !c at the start of the file and jump back again *)
  let p1 = oc |> pos_out in
  buck * 8 |> seek_out oc;
  write4ble oc p0;
  write4ble oc !c;
  p1 |> seek_out oc

let update
    (keep : bytes * bytes -> bool)
    (fkt_add_n : (bytes * bytes -> unit) -> unit)
    fn =
  let fn' = fn ^ "~" in
  truncate fn';
  let a = IntMap.empty |> Array.make 256 in
  let db_write kv = append (fun _ -> kv_add' a) [] kv fn' in
  (* keep kv-pairs *)
  fold_left (fun _ (_,kv) ->
      if kv |> keep
      then db_write kv
    )
    () fn;
  (* add kv-pairs *)
  fkt_add_n db_write;
  (* write hashes and pointers *)
  let p0 = (Unix.stat fn').st_size in
  let oc = open_out_gen [Open_binary;Open_wronly] 0 fn' in
  seek_out oc p0;
  a |> Array.iteri (write_hashes oc);
  close_out oc;
  Unix.rename fn' fn
